All following graphs and statistics are based on the results of the Limesurvey online pre-test. 40 participants filled out the online pre-test, of which 20 saw the same sentences and each participant saw them in a different pseudo-randomized order.
stimuli <- read.csv(file="allstimuli_new.csv",head=TRUE,sep=";",na.strings = c("","NAN"), stringsAsFactors = FALSE,encoding="UTF-8")
stimuli$fullID <- as.character(interaction(stimuli[,c(13,3)],sep = ""))
stimuli$Condition <- as.factor(stimuli$Condition)
colnames(stimuli) <- c("Type","Pair","ID","Det.","N0","Verb","Det.2","N1","Prep.","Det.3","Adj.","N2","Attachment","Unambiguous","verb_number","fullID")
#load response file
file_names = list.files(pattern="^results",path = "allresponses",full.names = TRUE)
temp <- lapply(file_names,read.csv,sep=",",na.strings = c("","NAN"), stringsAsFactors = TRUE)
df.responses <- Reduce(function(x,y) merge(x,y,all=TRUE,sort=TRUE),temp)
#transpose and fix column names and classes
df.responses <- setDT(df.responses,keep.rownames = TRUE)
colnames(df.responses)[1] <- "subjects"
df.responses <- Filter(function(x) !(all(x=="")), df.responses) #delete blank columns
Data was subsequently split into subject info (demographics etc.) and responses to items
#Split all item specific data from subject specific data
ind_items <- grep("[VN]A[0-9]",colnames(df.responses),value = TRUE)
df.subjectinfo <- df.responses
df.subjectinfo[,ind_items] <- NULL
df.responses <- df.responses[,c("subjects",ind_items),with=FALSE]
#reshape items so that each row contains info for items per subject (items repeat over rows)
ind <- grep("^[VN]A[0-9]*$",colnames(df.responses),value = TRUE)
df.respAttach <- df.responses[,c("subjects",ind),with=FALSE]
df.respAttach <- melt(df.respAttach,id="subjects",value.name="response_attachment",variable.name='items')
ind <- grep("^[VN]A[0-9]*Time$",colnames(df.responses),value = TRUE)
df.respTime <- df.responses[,c("subjects",ind),with=FALSE]
df.respTime <- melt(df.respTime,id="subjects",value.name="rt_attachment",variable.name='items')
ind <- grep("\\.$",colnames(df.responses),value = TRUE)
df.respPAttach <- df.responses[,c("subjects",ind),with=FALSE]
df.respPAttach <- melt(df.respPAttach,id="subjects",value.name="rating_plausibility",variable.name='items')
ind <- grep("P[VN]A[0-9]*Time$",colnames(df.responses),value = TRUE)
df.respPTime <- df.responses[,c("subjects",ind),with=FALSE]
df.respPTime <- melt(df.respPTime,id="subjects",value.name="rt_plausibility",variable.name='items')
df.responses <- cbind(df.respAttach,df.respTime[,3],df.respPAttach[,3],df.respPTime[,3])
df.responses$hits <- as.numeric((grepl("N",df.responses$items) & df.responses$response_attachment == "Nomen") |
(grepl("V",df.responses$items) & df.responses$response_attachment == "Verb"))
df.responses <- df.responses %>%
mutate(attachment = ifelse(grepl("N",items),"Noun","Verb"))
Age of participants:
summary(df.subjectinfo$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 19.00 22.00 25.00 25.16 27.00 33.00
Time taken to complete pre-test (in minutes):
summary((df.subjectinfo$interviewtime)/60)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 23.36 31.54 33.73 36.57 40.16 80.20
Outlier subjects were identified according to three scores:
1. Subjects who have more than one incorrect test item
2. Subjects who have less than 60% correct for the unambiguous items
3. Subjects who’s average reaction time diverges extremely from the average
#average accuracy of unambiguous items
subset_unambiguous <- stimuli$fullID[(stimuli$Unambiguous==1)]
sentences_unambiguous <- stimuli[(stimuli$Unambiguous==1),4:12]
sentences_unambiguous
least_correct = 3
threshhold_acc = 0.60
avg_rts <- df.responses %>%
group_by(subjects) %>%
summarise(mean_rt = mean(rt_attachment,na.rm=TRUE))
outlier <- boxplot.stats(avg_rts$mean_rt)$out
p <- ggplot(avg_rts, aes(x=factor(0),y=mean_rt,subject=subjects)) +
geom_boxplot() +
geom_jitter(size = 2) +
ggtitle("Average reaction times per subject across all items")
p <- ggplotly(p,tooltip ='subjects')
p
test_correct <- df.subjectinfo %>%
select(subjects,starts_with("correct")) %>%
mutate(number_correct = rowSums(!is.na(.[,2:ncol(.)]))) %>%
select(subjects,number_correct)
avg_unambiguous <-df.responses %>%
filter(items %in% subset_unambiguous) %>%
group_by(subjects) %>%
summarise(acc_unambiguous = mean(hits,na.rm=TRUE))
summ <- join_all(list(avg_rts,test_correct,avg_unambiguous),by='subjects')
DT::datatable(summ) %>% formatRound(columns=colnames(summ),digits=2)
outlier_indx <- avg_rts$subjects[which(round(avg_rts$mean_rt,digits=2) %in% round(outlier,digits=2))]
outlier_indx <- c(outlier_indx,summ$subjects[which(summ$number_correct < least_correct | summ$acc_unambiguous < threshhold_acc)])
outlier_indx <- unique(outlier_indx)
#Remove Outlier
df.responses <- df.responses %>%
filter(!(subjects %in% outlier_indx))
Number of removed outliers: 2
#Binomial distribution of probabilities if chance-performane is 50%
x <- seq(0,20,by = 1)
y <- dbinom(x,20,0.5)
#plot(x,y,main="Probability distribution assuming per item probability of 50%",
# xlab="Number of correct answers per items (out of 20 answers)", ylab = "Probability")
threshold = 0.72
acc_per_item <- df.responses %>%
group_by(items) %>%
summarise(acc = mean(hits,na.rm = TRUE), plaus = mean(rating_plausibility,na.rm = TRUE))
mean_accuracy <- acc_per_item %>%
summarise(mean_correct = mean(acc,na.rm = TRUE))
df.stim_acc <- merge(stimuli,acc_per_item,by.x="fullID",by.y="items")
df.stim_acc$acc <- round(df.stim_acc$acc,digits=2)
df.stim_acc$plaus <- round(df.stim_acc$plaus,digits=2)
items_reject <- df.stim_acc %>% arrange(Verb) %>% filter(acc < threshold)
#datatable(items_reject[,c(1,2,4,6:13,17)],
# filter = 'top',
# options = list("pageLength" = 10))
datatable(items_reject[,c(1,2,4,6:13,17,18)])
#first reject items based on accuracy threshold with their paired items
pairs_reject <- df.stim_acc[df.stim_acc$ID %in% items_reject$ID,]
df.responses_clean <- df.responses %>%
filter(!(items %in% pairs_reject$fullID))
#Then check if all verbs still occur twice (also for type 1 items) and reject remaining pair.
pairs_remain <- df.stim_acc[df.stim_acc$fullID %in% df.responses_clean$items,]
pairs_remain <- unique(pairs_remain[duplicated(pairs_remain$verb_number),"verb_number"])
items_reject2 <- df.stim_acc[!(df.stim_acc$verb_number %in% pairs_remain),]
pairs_reject2 <- df.stim_acc[df.stim_acc$ID %in% items_reject2$ID,]
df.responses_clean <- df.responses_clean %>%
filter(!(items %in% pairs_reject2$fullID))
datatable(df.stim_acc[df.stim_acc$fullID %in% df.responses_clean$item,c(1,2,4,6:13,17,18)])
#manually reject superfluous items
items_reject3 = c("VA249","NA249","NA21","VA21","NA243","VA243","NA2104","VA2104","NA247","VA247","NA288","VA288","NA255","VA255","NA237","VA237","NA22","VA22","NA25","VA25","NA24","VA24","NA272","VA272","NA229","VA229","NA286","VA286","NA287","VA287","NA275","VA275","NA278","VA278","NA253","VA253","NA120","VA120","NA133","VA133","NA125","VA125","NA183","VA183","NA188","VA188","NA1101","VA1101","NA147","VA147")
df.responses_clean <- df.responses_clean %>%
filter(!(items %in% items_reject3))
datatable(df.stim_acc[df.stim_acc$fullID %in% df.responses_clean$item,c(1,2,4,6:13,17,18)])
There are 100 items of Type 1 leftacc_per_item <- df.responses_clean %>%
group_by(items) %>%
summarise(acc = mean(hits,na.rm = TRUE))
mean_accuracy <- acc_per_item %>%
summarise(mean_correct = mean(acc,na.rm = TRUE))
df.stim_acc <- merge(stimuli,acc_per_item,by.x="fullID",by.y="items")
df.stim_acc$acc <- round(df.stim_acc$acc,digits=2)
diff_acc <- df.stim_acc %>% group_by(ID) %>% arrange(ID) %>% mutate(diff_acc= acc -lag(acc))
p <- ggplot(diff_acc, aes(Type,diff_acc,colour=Type,N0=N0,Verb=Verb,N1=N1,Prep=Prep.,Adj=Adj.,N2=N2)) +
geom_boxplot() +
geom_jitter(size = 2) +
ggtitle("difference in accuracy within pairs: contrast Verb-Noun ")+
labs(y="difference in accuracy (%)",colour="Type")+
scale_x_discrete(labels = c("Type 1: differ in Verb","Type 2: differ in Noun order"))
p <- ggplotly(p,tooltip =c('N0','Verb','N1','Prep','Adj','N2'))
p
summary_rts <- df.responses_clean %>%
group_by(subjects,attachment,hits) %>%
summarise(mean_rt = mean(rt_attachment))
p <- ggplot(data = subset(summary_rts, !is.na(hits)), aes(x = factor(hits), y = mean_rt,subject = subjects,fill=factor(hits))) +
geom_boxplot() +
facet_wrap(~attachment) +
geom_jitter(size = 2) +
ggtitle("RTs averaged over items")+
scale_x_discrete(labels = c("miss","hit","miss","hit"))
p <- ggplotly(p,tooltip = c("subject","mean_rt"))
p
### 3c. Accuracy for Verb and Noun attached items
attachment_freq <- df.responses_clean %>%
na.omit %>%
group_by(subjects,response_attachment) %>%
summarise(count=n())%>%
mutate(freq = count/sum(count))
#summarise(percent = length(response_attachment)/162)
percent_VAresponses <- attachment_freq[attachment_freq$response_attachment=="Verb",]
percent_NAresponses <- attachment_freq[attachment_freq$response_attachment=="Nomen",]
#ggplot(attachment_percent, aes(percent, fill = response_attachment)) +
# geom_density(alpha = 0.2)
ggplot(attachment_freq, aes(freq, fill = response_attachment)) + geom_histogram(alpha = 0.5, aes(y = ..density..), position = 'identity')
plausibility_counts <- df.responses_clean %>%
group_by(attachment) %>%
count(rating_plausibility) %>% na.omit()
ggplot(plausibility_counts,aes(rating_plausibility,n,fill=attachment))+
geom_bar(stat="identity",position=position_dodge())
# Group RTs by plausibility ratings and attachment
summary_rts <- df.responses_clean %>%
group_by(items,attachment,rating_plausibility) %>%
summarise(mean_rt = mean(rt_attachment)) # does make sense to take mean here?
p <- ggplot(summary_rts, aes(x = factor(rating_plausibility), y = mean_rt,items = items,fill=factor(rating_plausibility))) +
geom_boxplot() +
facet_wrap(~attachment) +
ggtitle("RT distribution over items per plausibility bin")
p <- ggplotly(p,tooltip = c("items","mean_rt"))
p